library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(dplyr)
library(lubridate)
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
1.Read the data
setwd("/Users/stevenqin/Desktop/5008 project")
h <- readRDS("hsioptionsfutures.rds")
hsi <- read_csv("HSI.csv")
## Rows: 494 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
h
## # A tibble: 1,098,101 × 11
## Date Series Market Market…¹ Commo…² Commo…³ Curre…⁴ Settl…⁵ Previ…⁶
## <date> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 2020-01-02 HSIF0 HSI HANG SE… HSI HANG S… HKD 28606 28270
## 2 2020-01-02 HSI20400A0 HSI HANG SE… HSI HANG S… HKD 8206 7870
## 3 2020-01-02 HSI20600A0 HSI HANG SE… HSI HANG S… HKD 8006 7670
## 4 2020-01-02 HSI20800A0 HSI HANG SE… HSI HANG S… HKD 7806 7470
## 5 2020-01-02 HSI21000A0 HSI HANG SE… HSI HANG S… HKD 7611 7271
## 6 2020-01-02 HSI21200A0 HSI HANG SE… HSI HANG S… HKD 7406 7071
## 7 2020-01-02 HSI21400A0 HSI HANG SE… HSI HANG S… HKD 7206 6871
## 8 2020-01-02 HSI21600A0 HSI HANG SE… HSI HANG S… HKD 7006 6671
## 9 2020-01-02 HSI21800A0 HSI HANG SE… HSI HANG S… HKD 6806 6471
## 10 2020-01-02 HSI22000A0 HSI HANG SE… HSI HANG S… HKD 6606 6271
## # … with 1,098,091 more rows, 2 more variables: Difference <dbl>,
## # Impliedvolatility <dbl>, and abbreviated variable names ¹MarketName,
## # ²Commodity, ³CommodityName, ⁴CurrencyCode, ⁵SettlementPrice,
## # ⁶PreviousSettlementPrice
hsi
## # A tibble: 494 × 7
## Date Open High Low Close `Adj Close` Volume
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2020-01-02 28249. 28544. 28246. 28544. 28544. 1262732800
## 2 2020-01-03 28828. 28883. 28428. 28452. 28452. 1797904800
## 3 2020-01-06 28326. 28368. 28054. 28226. 28226. 1793426600
## 4 2020-01-07 28353. 28473. 28264. 28322. 28322. 1302687200
## 5 2020-01-08 28000. 28199. 27858. 28088. 28088. 1709241600
## 6 2020-01-09 28368. 28561 28326. 28561 28561 1692786200
## 7 2020-01-10 28665. 28665. 28504. 28638. 28638. 1448401000
## 8 2020-01-13 28772. 28971. 28672. 28955. 28955. 1765055700
## 9 2020-01-14 29150. 29150. 28790. 28885. 28885. 1643504700
## 10 2020-01-15 28891. 28973. 28619. 28774. 28774. 1240120700
## # … with 484 more rows
hdate <- hsi$Date
dtotal <- data.frame(NULL)
for(i in 1:length(hdate)){
dh <- h %>% filter(Date == hdate[i]) %>% mutate(ProductPrice = hsi$`Adj Close`[i])
dtotal <- rbind(dtotal,dh)
}
dtotal
## # A tibble: 1,091,575 × 12
## Date Series Market Market…¹ Commo…² Commo…³ Curre…⁴ Settl…⁵ Previ…⁶
## <date> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 2020-01-02 HSIF0 HSI HANG SE… HSI HANG S… HKD 28606 28270
## 2 2020-01-02 HSI20400A0 HSI HANG SE… HSI HANG S… HKD 8206 7870
## 3 2020-01-02 HSI20600A0 HSI HANG SE… HSI HANG S… HKD 8006 7670
## 4 2020-01-02 HSI20800A0 HSI HANG SE… HSI HANG S… HKD 7806 7470
## 5 2020-01-02 HSI21000A0 HSI HANG SE… HSI HANG S… HKD 7611 7271
## 6 2020-01-02 HSI21200A0 HSI HANG SE… HSI HANG S… HKD 7406 7071
## 7 2020-01-02 HSI21400A0 HSI HANG SE… HSI HANG S… HKD 7206 6871
## 8 2020-01-02 HSI21600A0 HSI HANG SE… HSI HANG S… HKD 7006 6671
## 9 2020-01-02 HSI21800A0 HSI HANG SE… HSI HANG S… HKD 6806 6471
## 10 2020-01-02 HSI22000A0 HSI HANG SE… HSI HANG S… HKD 6606 6271
## # … with 1,091,565 more rows, 3 more variables: Difference <dbl>,
## # Impliedvolatility <dbl>, ProductPrice <dbl>, and abbreviated variable names
## # ¹MarketName, ²Commodity, ³CommodityName, ⁴CurrencyCode, ⁵SettlementPrice,
## # ⁶PreviousSettlementPrice
place <- data.frame(seperate=which(nchar(dtotal$Series)==5|nchar(dtotal$Series)==8))
lastnumber <- data.frame(seperate=dim(dtotal)[1])
place <- rbind(place,lastnumber)
dim(dtotal)
## [1] 1091575 12
colnames(dtotal)
## [1] "Date" "Series"
## [3] "Market" "MarketName"
## [5] "Commodity" "CommodityName"
## [7] "CurrencyCode" "SettlementPrice"
## [9] "PreviousSettlementPrice" "Difference"
## [11] "Impliedvolatility" "ProductPrice"
unique(dtotal$Market)
## [1] "HSI" "WK1"
unique(dtotal$MarketName)
## [1] "HANG SENG FUTURES & OPTIONS" "WEEKLY HANG SENG INDEX OPTIONS"
unique(dtotal$Commodity)
## [1] "HSI"
unique(dtotal$CurrencyCode)
## [1] "HKD"
weekly = dtotal%>% filter(MarketName=="WEEKLY HANG SENG INDEX OPTIONS")
weekly$Maturity <- ave(weekly$Date, weekly$Series, FUN = max)
weekly <- weekly %>% filter(nchar(Series)==13) %>% filter(MarketName=="WEEKLY HANG SENG INDEX OPTIONS") %>% mutate(timeToExpiration = as.numeric(Maturity - Date)) %>% mutate(strikePrice=as.numeric(substr(Series,4,8))) %>% select(Date,Series,MarketName,Impliedvolatility,ProductPrice,Maturity,timeToExpiration)
monthly = dtotal%>% filter(MarketName=="HANG SENG FUTURES & OPTIONS")
monthly$Maturity <- ave(monthly$Date, monthly$Series, FUN = max)
monthly <- monthly %>% filter(nchar(Series)==10) %>% filter(MarketName=="HANG SENG FUTURES & OPTIONS") %>% mutate(timeToExpiration = as.numeric(Maturity - Date)) %>% mutate(strikePrice=as.numeric(substr(Series,4,8))) %>% select(Date,Series,MarketName,Impliedvolatility,ProductPrice,Maturity,timeToExpiration)
putorcall=substr(weekly$Series,9,9)
weekly=cbind(weekly,putorcall)
call_indicator = list("A","B","C","D","E","F","G","H","I","J","K","L")
put_indicator = list("M","N","O","P","Q","R","S","T","U","V","W","X")
dcall1 <- weekly %>% filter(putorcall %in% call_indicator) %>% filter(Impliedvolatility != 0) %>% mutate(timeToExpiration = as.numeric(Maturity - Date)) %>% mutate(strikePrice=as.numeric(substr(Series,4,8))) %>% select(Date,Series,MarketName,Impliedvolatility,ProductPrice,timeToExpiration,strikePrice,putorcall)
dput1 <- weekly %>% filter(putorcall %in% put_indicator) %>% filter(Impliedvolatility != 0)%>% mutate(strikePrice=as.numeric(substr(Series,4,8)))%>% select(Date,Series,MarketName,Impliedvolatility,ProductPrice,timeToExpiration,strikePrice,putorcall)
otmp_1W <- dput1 %>% filter((strikePrice/ProductPrice>0.8)&(strikePrice/ProductPrice<0.95)) %>% filter(timeToExpiration>=0 & timeToExpiration<=7) %>% group_by(Date) %>% summarise(avg_otmp_1W=mean(Impliedvolatility))
otmp_1W
## # A tibble: 472 × 2
## Date avg_otmp_1W
## <date> <dbl>
## 1 2020-01-02 65.9
## 2 2020-01-03 25.5
## 3 2020-01-06 31.5
## 4 2020-01-07 37.1
## 5 2020-01-08 42.4
## 6 2020-01-09 67.0
## 7 2020-01-10 26.0
## 8 2020-01-13 35.4
## 9 2020-01-14 38.2
## 10 2020-01-15 48.0
## # … with 462 more rows
atmc_1W <- dcall1 %>% filter((strikePrice/ProductPrice>0.95)&(strikePrice/ProductPrice<1.05)) %>% filter(timeToExpiration>=0 & timeToExpiration<=7) %>% group_by(Date) %>% summarise(avg_atmc_1W =mean(Impliedvolatility))
atmc_1W
## # A tibble: 472 × 2
## Date avg_atmc_1W
## <date> <dbl>
## 1 2020-01-02 19.5
## 2 2020-01-03 16.4
## 3 2020-01-06 19.9
## 4 2020-01-07 17.7
## 5 2020-01-08 22.2
## 6 2020-01-09 19.4
## 7 2020-01-10 14.5
## 8 2020-01-13 17.8
## 9 2020-01-14 19.6
## 10 2020-01-15 18.8
## # … with 462 more rows
otmp_2W <- dput1 %>% filter((strikePrice/ProductPrice>0.8)&(strikePrice/ProductPrice<0.95)) %>% filter(timeToExpiration>=8&timeToExpiration<=14) %>% group_by(Date) %>% summarise(avg_otmp_2W = mean(Impliedvolatility))
otmp_2W
## # A tibble: 465 × 2
## Date avg_otmp_2W
## <date> <dbl>
## 1 2020-01-02 24.6
## 2 2020-01-03 15.7
## 3 2020-01-06 23.8
## 4 2020-01-07 24.1
## 5 2020-01-08 25.6
## 6 2020-01-09 25.4
## 7 2020-01-10 16.4
## 8 2020-01-13 21.2
## 9 2020-01-14 21.1
## 10 2020-01-15 21.7
## # … with 455 more rows
atmc_2W <- dcall1 %>% filter((strikePrice/ProductPrice>0.95)&(strikePrice/ProductPrice<1.05)) %>% filter(timeToExpiration>=8&timeToExpiration<=14) %>% group_by(Date) %>% summarise(avg_atmc_2W = mean(Impliedvolatility))
Combined_1W=merge(otmp_1W,atmc_1W,by=c("Date"),all.x=TRUE) %>% mutate("SKEW_1W" = avg_otmp_1W - avg_atmc_1W)
Combined_2W=merge(otmp_2W,atmc_2W,by=c("Date"),all.x=TRUE) %>% mutate("SKEW_2W" = avg_otmp_2W - avg_atmc_2W)
combined_W = merge(Combined_1W,Combined_2W,by=c("Date"),all.x=TRUE)
combined_W = combined_W %>% select(Date,SKEW_1W,SKEW_2W)
write.csv(combined_W,"/Users/stevenqin/Desktop/5008 project/HSI_SKEW_1W+2W.csv", row.names = FALSE)
putorcall=substr(monthly$Series,9,9)
monthly=cbind(monthly,putorcall)
call_indicator = list("A","B","C","D","E","F","G","H","I","J","K","L")
put_indicator = list("M","N","O","P","Q","R","S","T","U","V","W","X")
dcall2 <- monthly %>% filter(putorcall %in% call_indicator) %>% filter(Impliedvolatility != 0) %>% mutate(timeToExpiration = as.numeric(Maturity - Date)) %>% mutate(strikePrice=as.numeric(substr(Series,4,8))) %>% select(Date,Series,MarketName,Impliedvolatility,ProductPrice,timeToExpiration,strikePrice,putorcall)
dput2 <- monthly %>% filter(putorcall %in% put_indicator) %>% filter(Impliedvolatility != 0)%>% mutate(strikePrice=as.numeric(substr(Series,4,8)))%>% select(Date,Series,MarketName,Impliedvolatility,ProductPrice,timeToExpiration,strikePrice,putorcall)
otmp_1M <- dput2 %>% filter((strikePrice/ProductPrice>0.8)&(strikePrice/ProductPrice<0.95)) %>% filter(timeToExpiration>=15 & timeToExpiration<=45) %>% group_by(Date) %>% summarise(avg_otmp_1M=mean(Impliedvolatility))
otmp_1M
## # A tibble: 477 × 2
## Date avg_otmp_1M
## <date> <dbl>
## 1 2020-01-02 21.9
## 2 2020-01-03 23.3
## 3 2020-01-06 23.1
## 4 2020-01-07 23.3
## 5 2020-01-08 24.6
## 6 2020-01-09 22.3
## 7 2020-01-10 22.4
## 8 2020-01-13 22.1
## 9 2020-01-14 22.4
## 10 2020-01-15 22.3
## # … with 467 more rows
atmc_1M <- dcall2 %>% filter((strikePrice/ProductPrice>0.95)&(strikePrice/ProductPrice<1.05)) %>% filter(timeToExpiration>=15 & timeToExpiration<=45) %>% group_by(Date) %>% summarise(avg_atmc_1M =mean(Impliedvolatility))
atmc_1M
## # A tibble: 477 × 2
## Date avg_atmc_1M
## <date> <dbl>
## 1 2020-01-02 14.1
## 2 2020-01-03 14.8
## 3 2020-01-06 15.5
## 4 2020-01-07 14.2
## 5 2020-01-08 15.1
## 6 2020-01-09 13.6
## 7 2020-01-10 13.1
## 8 2020-01-13 14.2
## 9 2020-01-14 14.4
## 10 2020-01-15 14.1
## # … with 467 more rows
otmp_2M <- dput2 %>% filter((strikePrice/ProductPrice>0.8)&(strikePrice/ProductPrice<0.95)) %>% filter(timeToExpiration>=46&timeToExpiration<=90) %>% group_by(Date) %>% summarise(avg_otmp_2M = mean(Impliedvolatility))
otmp_2M
## # A tibble: 461 × 2
## Date avg_otmp_2M
## <date> <dbl>
## 1 2020-01-02 20.5
## 2 2020-01-03 21.5
## 3 2020-01-06 21.5
## 4 2020-01-07 21.4
## 5 2020-01-08 22.2
## 6 2020-01-09 20.6
## 7 2020-01-10 20.2
## 8 2020-01-13 20.3
## 9 2020-01-14 20.2
## 10 2020-01-15 20.1
## # … with 451 more rows
atmc_2M <- dcall2 %>% filter((strikePrice/ProductPrice>0.95)&(strikePrice/ProductPrice<1.05)) %>% filter(timeToExpiration>=46&timeToExpiration<=90) %>% group_by(Date) %>% summarise(avg_atmc_2M = mean(Impliedvolatility))
Combined_1M=merge(otmp_1M,atmc_1M,by=c("Date"),all.x=TRUE) %>% mutate("SKEW_1M" = avg_otmp_1M - avg_atmc_1M)
Combined_2M=merge(otmp_2M,atmc_2M,by=c("Date"),all.x=TRUE) %>% mutate("SKEW_2M" = avg_otmp_2M - avg_atmc_2M)
combined_M = merge(Combined_1M,Combined_2M,by=c("Date"),all.x=TRUE)
combined_M = combined_M %>% select(Date,SKEW_1M,SKEW_2M)
write.csv(combined_M,"/Users/stevenqin/Desktop/5008 project/HSI_SKEW_1M+2M.csv", row.names = FALSE)
setwd("/Users/stevenqin/Desktop/5008 project/APPLE_OPTIONS_CSV")
fnames <- list.files()
csv <- lapply(fnames, read.csv)
df_aapl <- do.call(rbind, csv)
setwd("/Users/stevenqin/Desktop/5008 project/TESLA_OPTION_CSV")
fnames <- list.files()
csv <- lapply(fnames, read.csv)
df_tsla <- do.call(rbind, csv)
setwd("/Users/stevenqin/Desktop/5008 project/SPX500_OPTIONS_CSV")
fnames <- list.files()
csv <- lapply(fnames, read.csv)
df_spx <- do.call(rbind, csv)
setwd("/Users/stevenqin/Desktop/5008 project")
TSLA = read_csv('TSLA.csv')
## Rows: 505 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
HSI = read_csv('HSI.csv')
## Rows: 494 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
AAPL = read_csv('Apple_Stock.csv')
## Rows: 504 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
SPX = read_csv('NEWSP500.csv')
## Rows: 505 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Date, Change %
## dbl (10): Price, Open, High, Low, SMA5, SMA20, SMA50, RSI, MOMEN, ATR
## lgl (1): Vol.
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sigma_TSLA = sd(log(1+(TSLA$`Adj Close`[-1]-TSLA$`Adj Close`[-nrow(TSLA)])/TSLA$`Adj Close`[-nrow(TSLA)]))
sigma_HSI = sd(log(1+(HSI$`Adj Close`[-1]-HSI$`Adj Close`[-nrow(HSI)])/HSI$`Adj Close`[-nrow(HSI)]))
sigma_AAPL = sd(log(1+(AAPL$`Adj Close`[-1]-AAPL$`Adj Close`[-nrow(AAPL)])/AAPL$`Adj Close`[-nrow(AAPL)]))
sigma_SPX = sd(log(1+(SPX$`Price` [-1]-SPX$`Price`[-nrow(SPX)])/SPX$`Price`[-nrow(SPX)]))
print('sigma_TSLA is')
## [1] "sigma_TSLA is"
cat(sigma_TSLA)
## 0.04675678
print('sigma_HSI is')
## [1] "sigma_HSI is"
cat(sigma_HSI)
## 0.01375497
print('sigma_AAPL is')
## [1] "sigma_AAPL is"
cat(sigma_AAPL)
## 0.02362969
print('sigma_SPX is')
## [1] "sigma_SPX is"
cat(sigma_SPX)
## 0.01652697
#APPLE
df_aapl1 = df_aapl %>% select("X.QUOTE_DATE.",
"X.STRIKE." ,
"X.UNDERLYING_LAST." ,
"X.EXPIRE_DATE.",
"X.DTE." ,
"X.STRIKE_DISTANCE_PCT.",
"X.P_IV.",
"X.C_IV.",
) %>% filter( !is.na(X.P_IV.)) %>% filter(between(X.STRIKE_DISTANCE_PCT., 0.1,0.2))
put_1m = df_aapl1 %>% filter(between(X.DTE., 15,45)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_1m =mean(`X.P_IV.`,na.rm = TRUE))
put_1w = df_aapl1 %>% filter(between(X.DTE., 0,7)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_1w =mean(`X.P_IV.`,na.rm = TRUE))
put_2w = df_aapl1 %>% filter(between(X.DTE., 8,14)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_2w =mean(`X.P_IV.`,na.rm = TRUE))
put_2m = df_aapl1 %>% filter(between(X.DTE., 46,90)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_2m =mean(`X.P_IV.`,na.rm = TRUE))
df_aapl1 = df_aapl %>% select("X.QUOTE_DATE.",
"X.STRIKE." ,
"X.UNDERLYING_LAST." ,
"X.EXPIRE_DATE.",
"X.DTE." ,
"X.STRIKE_DISTANCE_PCT.",
"X.P_IV.",
"X.C_IV.",
) %>% filter( !is.na(X.C_IV.)) %>% filter(between(X.STRIKE_DISTANCE_PCT., 0,0.1))
call_1m = df_aapl1 %>% filter(between(X.DTE., 15,45)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_1m =mean(`X.C_IV.`,na.rm = TRUE))
call_1w = df_aapl1 %>% filter(between(X.DTE., 0,7)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_1w =mean(`X.C_IV.`,na.rm = TRUE))
call_2w = df_aapl1 %>% filter(between(X.DTE., 8,14)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_2w =mean(`X.C_IV.`,na.rm = TRUE))
call_2m = df_aapl1 %>% filter(between(X.DTE., 46,90)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_2m =mean(`X.C_IV.`,na.rm = TRUE))
combined_1m =merge(call_1m,put_1m,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_1M" = `put_avg_score_1m`- `call_avg_score_1m`)
combined_1w =merge(call_1w,put_1w,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_1W" = `put_avg_score_1w`- `call_avg_score_1w`)
combined_2w =merge(call_2w,put_2w,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_2W" = `put_avg_score_2w`- `call_avg_score_2w`)
combined_2m =merge(call_2m,put_2m,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_2M" = `put_avg_score_2m`- `call_avg_score_2m`)
combined_aapl <- combined_1m %>%
left_join(combined_1w, by='X.QUOTE_DATE.') %>%
left_join(combined_2w, by='X.QUOTE_DATE.')%>%
left_join(combined_2m, by='X.QUOTE_DATE.')
#TSLA
df_tsla1 = df_tsla %>% select("X.QUOTE_DATE.",
"X.STRIKE." ,
"X.UNDERLYING_LAST." ,
"X.EXPIRE_DATE.",
"X.DTE." ,
"X.STRIKE_DISTANCE_PCT.",
"X.P_IV.",
"X.C_IV.",
) %>% filter( !is.na(X.P_IV.)) %>% filter(between(X.STRIKE_DISTANCE_PCT., 0.2,0.4))
put_1m = df_tsla1 %>% filter(between(X.DTE., 15,45)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_1m =mean(`X.P_IV.`,na.rm = TRUE))
put_1w = df_tsla1 %>% filter(between(X.DTE., 0,7)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_1w =mean(`X.P_IV.`,na.rm = TRUE))
put_2w = df_tsla1 %>% filter(between(X.DTE., 8,14)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_2w =mean(`X.P_IV.`,na.rm = TRUE))
put_2m = df_tsla1 %>% filter(between(X.DTE., 46,90)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_2m =mean(`X.P_IV.`,na.rm = TRUE))
df_tsla1 = df_tsla %>% select("X.QUOTE_DATE.",
"X.STRIKE." ,
"X.UNDERLYING_LAST." ,
"X.EXPIRE_DATE.",
"X.DTE." ,
"X.STRIKE_DISTANCE_PCT.",
"X.P_IV.",
"X.C_IV.",
) %>% filter( !is.na(X.C_IV.)) %>% filter(between(X.STRIKE_DISTANCE_PCT., 0,0.2))
call_1m = df_tsla1 %>% filter(between(X.DTE., 15,45)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_1m =mean(`X.C_IV.`,na.rm = TRUE))
call_1w = df_tsla1 %>% filter(between(X.DTE., 0,7)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_1w =mean(`X.C_IV.`,na.rm = TRUE))
call_2w = df_tsla1 %>% filter(between(X.DTE., 8,14)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_2w =mean(`X.C_IV.`,na.rm = TRUE))
call_2m = df_tsla1 %>% filter(between(X.DTE., 46,90)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_2m =mean(`X.C_IV.`,na.rm = TRUE))
combined_1m =merge(call_1m,put_1m,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_1M" = `put_avg_score_1m`- `call_avg_score_1m`)
combined_1w =merge(call_1w,put_1w,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_1W" = `put_avg_score_1w`- `call_avg_score_1w`)
combined_2w =merge(call_2w,put_2w,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_2W" = `put_avg_score_2w`- `call_avg_score_2w`)
combined_2m =merge(call_2m,put_2m,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_2M" = `put_avg_score_2m`- `call_avg_score_2m`)
combined_tsla <- combined_1m %>%
left_join(combined_1w, by='X.QUOTE_DATE.') %>%
left_join(combined_2w, by='X.QUOTE_DATE.')%>%
left_join(combined_2m, by='X.QUOTE_DATE.')
#SPX
df_spx1 = df_spx %>% select("X.QUOTE_DATE.",
"X.STRIKE." ,
"X.UNDERLYING_LAST." ,
"X.EXPIRE_DATE.",
"X.DTE." ,
"X.STRIKE_DISTANCE_PCT.",
"X.P_IV.",
"X.C_IV.",
) %>% filter( !is.na(X.P_IV.)) %>% filter(between(X.STRIKE_DISTANCE_PCT., 0.05,0.2))
put_1m = df_spx1 %>% filter(between(X.DTE., 15,45)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_1m =mean(`X.P_IV.`,na.rm = TRUE))
put_1w = df_spx1 %>% filter(between(X.DTE., 0,7)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_1w =mean(`X.P_IV.`,na.rm = TRUE))
put_2w = df_spx1 %>% filter(between(X.DTE., 8,14)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_2w =mean(`X.P_IV.`,na.rm = TRUE))
put_2m = df_spx1 %>% filter(between(X.DTE., 46,90)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_2m =mean(`X.P_IV.`,na.rm = TRUE))
df_spx1 = df_spx %>% select("X.QUOTE_DATE.",
"X.STRIKE." ,
"X.UNDERLYING_LAST." ,
"X.EXPIRE_DATE.",
"X.DTE." ,
"X.STRIKE_DISTANCE_PCT.",
"X.P_IV.",
"X.C_IV.",
) %>% filter( !is.na(X.C_IV.)) %>% filter(between(X.STRIKE_DISTANCE_PCT., 0,0.05))
call_1m = df_spx1 %>% filter(between(X.DTE., 15,45)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_1m =mean(`X.C_IV.`,na.rm = TRUE))
call_1w = df_spx1 %>% filter(between(X.DTE., 0,7)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_1w =mean(`X.C_IV.`,na.rm = TRUE))
call_2w = df_spx1 %>% filter(between(X.DTE., 8,14)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_2w =mean(`X.C_IV.`,na.rm = TRUE))
call_2m = df_spx1 %>% filter(between(X.DTE., 46,90)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_2m =mean(`X.C_IV.`,na.rm = TRUE))
combined_1m =merge(call_1m,put_1m,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_1M" = `put_avg_score_1m`- `call_avg_score_1m`)
combined_1w =merge(call_1w,put_1w,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_1W" = `put_avg_score_1w`- `call_avg_score_1w`)
combined_2w =merge(call_2w,put_2w,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_2W" = `put_avg_score_2w`- `call_avg_score_2w`)
combined_2m =merge(call_2m,put_2m,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_2M" = `put_avg_score_2m`- `call_avg_score_2m`)
combined_spx <- combined_1m %>%
left_join(combined_1w, by='X.QUOTE_DATE.') %>%
left_join(combined_2w, by='X.QUOTE_DATE.')%>%
left_join(combined_2m, by='X.QUOTE_DATE.')
result_aapl = combined_aapl %>% select("X.QUOTE_DATE.",
"SKEW_1W",
"SKEW_2W",
"SKEW_1M",
"SKEW_2M",)
write.csv(result_aapl,'AAPL_SKEW.csv')
result_tsla = combined_tsla %>% select("X.QUOTE_DATE.",
"SKEW_1W",
"SKEW_2W",
"SKEW_1M",
"SKEW_2M",)
write.csv(result_tsla,'TSLA_SKEW.csv')
result_spx = combined_spx %>% select("X.QUOTE_DATE.",
"SKEW_1W",
"SKEW_2W",
"SKEW_1M",
"SKEW_2M",)
write.csv(result_spx,'SPX_SKEW.csv')
library(ggplot2)
plot(result_spx)
#HSI
ggplot(data=combined_W, aes(x=Date, y=SKEW_1W, group=1)) +
geom_line()+
geom_point()
ggplot(data=combined_W, aes(x=Date, y=SKEW_2W, group=1)) +
geom_line()+
geom_point()
## Warning: Removed 5 rows containing missing values (`geom_line()`).
## Warning: Removed 29 rows containing missing values (`geom_point()`).
ggplot(data=combined_M, aes(x=Date, y=SKEW_1M, group=1)) +
geom_line()+
geom_point()
ggplot(data=combined_M, aes(x=Date, y=SKEW_2M, group=1)) +
geom_line()+
geom_point()
## Warning: Removed 23 rows containing missing values (`geom_line()`).
## Warning: Removed 23 rows containing missing values (`geom_point()`).
#AAPL
ggplot(data=result_aapl, aes(x=X.QUOTE_DATE., y=SKEW_1W, group=1)) +
geom_line()+
geom_point()
ggplot(data=result_aapl, aes(x=X.QUOTE_DATE., y=SKEW_2W, group=1)) +
geom_line()+
geom_point()
## Warning: Removed 10 rows containing missing values (`geom_point()`).
ggplot(data=result_aapl, aes(x=X.QUOTE_DATE., y=SKEW_1M, group=1)) +
geom_line()+
geom_point()
ggplot(data=result_aapl, aes(x=X.QUOTE_DATE., y=SKEW_2M, group=1)) +
geom_line()+
geom_point()
## Warning: Removed 2 rows containing missing values (`geom_point()`).
#TSLA
ggplot(data=result_tsla, aes(x=X.QUOTE_DATE., y=SKEW_1W, group=1)) +
geom_line()+
geom_point()
ggplot(data=result_tsla, aes(x=X.QUOTE_DATE., y=SKEW_2W, group=1)) +
geom_line()+
geom_point()
## Warning: Removed 10 rows containing missing values (`geom_point()`).
ggplot(data=result_tsla, aes(x=X.QUOTE_DATE., y=SKEW_1M, group=1)) +
geom_line()+
geom_point()
ggplot(data=result_tsla, aes(x=X.QUOTE_DATE., y=SKEW_2W, group=1)) +
geom_line()+
geom_point()
## Warning: Removed 10 rows containing missing values (`geom_point()`).
#SPX
ggplot(data=result_spx, aes(x=X.QUOTE_DATE., y=SKEW_1W, group=1)) +
geom_line()+
geom_point()
ggplot(data=result_spx, aes(x=X.QUOTE_DATE., y=SKEW_2W, group=1)) +
geom_line()+
geom_point()
ggplot(data=result_spx, aes(x=X.QUOTE_DATE., y=SKEW_1M, group=1)) +
geom_line()+
geom_point()
ggplot(data=result_spx, aes(x=X.QUOTE_DATE., y=SKEW_2M, group=1)) +
geom_line()+
geom_point()
#HSI
hist(combined_W$SKEW_1W, col='#5BBCBF', xlim=)
hist(combined_W$SKEW_2W, col='#93402E', add=TRUE)
hist(combined_M$SKEW_1M, col='#D5D76C', add=TRUE)
hist(combined_M$SKEW_2M, col='#A771E1', add=TRUE)
#AAPL
hist(result_aapl$SKEW_1W, col='#5BBCBF', xlim=)
hist(result_aapl$SKEW_2W, col='#93402E', add=TRUE)
hist(result_aapl$SKEW_1M, col='#D5D76C', add=TRUE)
hist(result_aapl$SKEW_2M, col='#A771E1', add=TRUE)
#TSLA
hist(result_tsla$SKEW_1W, col='#5BBCBF', xlim=)
hist(result_tsla$SKEW_2W, col='#93402E', add=TRUE)
hist(result_tsla$SKEW_1M, col='#D5D76C', add=TRUE)
hist(result_tsla$SKEW_2M, col='#A771E1', add=TRUE)
#TSLA
hist(result_spx$SKEW_1W, col='#5BBCBF', xlim=)
hist(result_spx$SKEW_2W, col='#93402E', add=TRUE)
hist(result_spx$SKEW_1M, col='#D5D76C', add=TRUE)
hist(result_spx$SKEW_2M, col='#A771E1', add=TRUE)